import requests
from lxml import etree
import re


class CrawlData:
    def __init__(self, data_str=None):
        self.data_str = data_str
        self._css_header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
        }
        self._default_header = {
            "Host": "fund.eastmoney.com",
            "Cookie": "qgqp_b_id=c3f0b071999c6bb7712329c25c64525c; _adsame_fullscreen_16928=1; st_si=97739381351278; st_asi=delete; ASP.NET_SessionId=lcjteoivh0mv3gwu50etal1w; st_pvi=61199487957634; st_sp=2021-11-17%2011%3A22%3A07; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=6; st_psi=20211224114937334-112200304021-8703716106",
            "Referer": "http://fund.eastmoney.com/data/fundranking.html",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
        }
        self.tiantian_url = "http://fund.eastmoney.com/data/fundranking.html"
        self.data_url = "https://fund.eastmoney.com/data/rankhandler.aspx?op=ph&dt=kf&ft=all&rs=&gs=0&sc=6yzf&st=desc&sd=2020-12-24&ed=2021-12-24&qdii=&tabSubtype=,,,,,&pi=1&pn=50&dx=1&v=0.5371245086126664"

    def page_data(self):
        response = requests.get(url=self.data_url, headers=self._default_header)
        content = response.text
        data_str = re.findall('\[(.*?)\]', content)[0]
        data_dic = re.findall('\"(.*?)\"', data_str)
        data = []
        for item in data_dic:
            item_split = item.split(",")
            data.append(item_split)
        return data
